Gorilla to Tidy Data: Cognitive Control and Motivated Reasoning

Data cleaning, data checks, and some preparations for further analyses.
# use groundhog to make code maximally reproducible
if (!require("groundhog", quietly = TRUE)) {
  install.packages("groundhog")
}
library("groundhog")

# use groundhog to install and load packages
pkgs <- c("here",         # System path management
          "tidyverse",    # ggplot, dplyr, %>%, and friends
          "tinytable"     # Lightweight package to create tables
          )

groundhog.library(pkgs, "2024-07-01") 

Load the data

I have data from different versions of the tasks and questionnaires. The following code automatically loads them from the specified path and renames them with the task | questionnaire identifier.

# Define the path to stored raw data
raw_dir <- here("01_data", "raw") 

# List all CSV files in the folder
raw_files_list <- list.files(path = raw_dir, pattern = "*.csv", full.names = TRUE)

# Function to read a CSV file and assign it to a dataframe with the desired name
read_and_assign_simple <- function(file) {
  # Extract parts of the file name
  file_name <- basename(file)
  parts <- str_match(file_name, "data_exp_.*_(questionnaire|task)-(.*)\\.csv")
  task_or_questionnaire <- parts[2]
  identifier <- parts[3]
  
  # Create the new name
  new_name <- paste0("data_", task_or_questionnaire, "_", identifier)
  
  # Read the CSV file
  df <- read_csv(file)
  
  # Filter out rows where "Participant Private ID" is NA
  df <- df %>% filter(!is.na(`Participant Private ID`))
  
  # Assign the dataframe to the new name in the global environment
  assign(new_name, df, envir = .GlobalEnv)
}

# Apply the function to all files
map(raw_files_list, read_and_assign_simple)

Data cleaning

In this section, I want to remove any unnecessary rows and columns and rename the dataframes with something more intuitive.

I start by creating a tibble that matches each task/questionnaire identifier with a name.

identifier_names <- tribble(
  ~identifier, ~name,
  "svnz", "consent",
  "av22", "questionnaire_pre",
  "uamn", "go_nogo_m",
  "8o8a", "go_nogo_p",
  "yknh", "fake_news_game",
  "nn4b", "debrief"
)

identifier_names %>% 
  tt(theme = "striped")
tinytable_0cd48fpwsovgd5fuxtv5
identifier name
svnz consent
av22 questionnaire_pre
uamn go_nogo_m
8o8a go_nogo_p
yknh fake_news_game
nn4b debrief

Select, filter, and rename variables

Questionnaires (av22)

data_questionnaires <- data_questionnaire_av22 %>% 
  select(
    `Participant Private ID`,
    `age object-18 Month`:`dog11 object-131 Quantised`,
    -contains("Quantised"))
data_questionnaires <- data_questionnaires %>% 
  rename(age_month = `age object-18 Month`,
         age_year = `age object-18 Year`,
         gender = `gender object-6 Response`,
         gender_other = `gender object-6 Other`,
         worksit = `worksit object-19 Response`,
         worksit_other = `worksit object-19 Other`,
         education = `education object-11 Response`,
         education_other = `education object-11 Other`,
         ideology = `ideology object-38 Response`,
         partisanship = `partisanship object-23 Response`,
         partisanship_other = `partisanship object-23 Other`,
         conservative_rating = `conservative object-24 Value`,
         labour_rating = `labour object-27 Value`,
         libdem_rating = `libdem object-28 Value`,
         green_rating = `green object-29 Value`,
         reform_rating = `reform object-30 Value`,
         crt1 = `CRT1 object-31 Value`,
         crt2 = `CRT2 object-33 Value`,
         crt3 = `CRT3 object-35 Value`,
         o_immigration = `immigration object-91 Response`,
         o_climate = `climate object-92 Response`,
         o_punishment = `punishment object-93 Response`,
         o_teaculture = `teaculture object-94 Response`,
         o_brain = `brain object-96 Response`,
         attention_check = `attention object-97 Response`,
         o_discrimination = `discrimination object-107 Response`,
         o_cats = `cats object-108 Response`,
         o_selfenhancement = `selfenhancement object-109 Response`,
         o_adoption = `adoption object-110 Response`,
         o_gender = `gender object-111 Response`,
         dog01 = `dog01 object-112 Response`,
         dog02 = `dog02 object-113 Response`,
         dog03 = `dog03 object-114 Response`,
         dog04 = `dog04 object-115 Response`,
         dog05 = `dog05 object-116 Response`,
         dog06 = `dog06 object-117 Response`,
         dog07 = `dog07 object-127 Response`,
         dog08 = `dog08 object-128 Response`,
         dog09 = `dog09 object-129 Response`,
         dog10 = `dog10 object-130 Response`,
         dog11 = `dog11 object-131 Response`
         )

Go / No-Go M Start (uamn)

data_gng_m_sel <- data_task_uamn %>% 
  select(
    `Participant Private ID`,
    `UTC Date and Time`,
    `Experiment Version`,
    `Participant Device`,
    `Participant Browser`,
    `Task Name`,
    `randomiser-evbs`,
    `Event Index`,
    `Trial Number`,
    Screen,
    `Component Name`,
    `Object Name`,
    `Response Type`,
    Response,
    `Reaction Time`,
    Correct,
    `Spreadsheet: stimulus`:`Spreadsheet: trial_id`,
    Display,
  )
data_gng_m_sel <- data_gng_m_sel %>% 
  rename(
    response = Response,
    rt = `Reaction Time`,
    correct = Correct,
    stimulus = `Spreadsheet: response`,
    letter = `Spreadsheet: stimulus`,
    condition = Display,
    trial_id = `Spreadsheet: trial_id`,
  )
data_gng_m <- data_gng_m_sel %>% 
  filter(`Response Type` == "response")

Go / No-Go P Start (8o8a)

data_gng_p_sel <- data_task_8o8a %>% 
  select(
    `Participant Private ID`,
    `UTC Date and Time`,
    `Experiment Version`,
    `Participant Device`,
    `Participant Browser`,
    `Task Name`,
    `randomiser-evbs`,
    `Event Index`,
    `Trial Number`,
    Screen,
    `Component Name`,
    `Object Name`,
    `Response Type`,
    Response,
    `Reaction Time`,
    Correct,
    `Spreadsheet: stimulus`:`Spreadsheet: trial_id`,
    Display,
  )
data_gng_p_sel <- data_gng_p_sel %>% 
  rename(
    response = Response,
    rt = `Reaction Time`,
    correct = Correct,
    stimulus = `Spreadsheet: response`,
    letter = `Spreadsheet: stimulus`,
    condition = Display,
    trial_id = `Spreadsheet: trial_id`,
  )
data_gng_p <- data_gng_p_sel %>% 
  filter(`Response Type` == "response")

Fake News Game (yknh)

data_fake_news_sel <- data_task_yknh %>% 
  select(
    `Participant Private ID`,
    `UTC Date and Time`,
    `Experiment Version`,
    `Participant Device`,
    `Participant Browser`,
    `Task Name`,
    `Task Version`,
    `Trial Number`,
    Display,
    `Response Type`,
    `Component Name`,
    Screen,
    Tag,
    `Reaction Time`,
    `Spreadsheet: task`,
    `Spreadsheet: identifier`,
    `Spreadsheet: question`,
    `Spreadsheet: name`,
    `Spreadsheet: type`,
    Response,
    `Spreadsheet: message`,
    `Spreadsheet: correct_answer`,
    Correct,
    `Response Duration`,
  )
data_fake_news_sel <- data_fake_news_sel %>% 
  rename(rt = `Reaction Time`,
         trial_type = `Spreadsheet: task`,
         trial_id = `Spreadsheet: identifier`,
         question_type = `Spreadsheet: type`,
         question_topic = `Spreadsheet: name`,
         question = `Spreadsheet: question`,
         response = Response,
         message = `Spreadsheet: message`,
         correct_answer = `Spreadsheet: correct_answer`,
         guess_correct = Correct,
         response_duration = `Response Duration`
         )
data_fake_news <- data_fake_news_sel %>% 
  filter(`Response Type` == "response")

Debrief (nn4b)

data_debrief <- data_questionnaire_nn4b %>% 
  select(`Participant Private ID`,
         `attention_end object-3 Response`)
data_debrief <- data_debrief %>% 
  rename(
    attention_end = `attention_end object-3 Response`
  )

Join relevant data frames

dfs <- list(data_consent, data_questionnaires, data_debrief)

# join all dfs
data_questionnaires_combined <- 
  reduce(dfs, full_join, by = "Participant Private ID")

head(data_questionnaires_combined) %>% 
  tt()
tinytable_64uvhct1r2wsj896rho9
Participant Private ID UTC Date and Time Experiment Version Participant Device Participant Browser randomiser-evbs attention_start attention_start object-14 Quantised consent1 consent2 consent3 consent4 recontact recontact object-19 Quantised age_month age_year gender gender_other worksit worksit_other education education_other ideology partisanship partisanship_other conservative_rating labour_rating libdem_rating green_rating reform_rating crt1 crt2 crt3 o_immigration o_climate o_punishment o_teaculture o_brain attention_check o_discrimination o_cats o_selfenhancement o_adoption o_gender dog01 dog02 dog03 dog04 dog05 dog06 dog07 dog08 dog09 dog10 dog11 attention_end
11693545 12/09/2024 08:16:23 38 Desktop or Laptop Chrome 130.0.0.0 Start P Yes 1 1 1 1 1 Yes 1 0 39 male NA Employed NA Higher secondary or further education (A-levels, T-levels, BTEC, International Baccalaureate or equivalent) NA Left Labour NA 3 91 43 84 1 8 10 39 Strongly disagree Strongly agree Disagree Neutral Neutral Strongly agree Strongly agree Disagree Agree Strongly agree Strongly disagree Disagree Strongly agree Strongly disagree Strongly agree Agree Disagree Agree Agree Strongly agree Strongly disagree Disagree Yes
11693585 12/09/2024 08:21:22 38 Desktop or Laptop Chrome 128.0.0.0 Start M Yes 1 1 1 1 1 Yes 1 9 36 female NA Employed NA Higher secondary or further education (A-levels, T-levels, BTEC, International Baccalaureate or equivalent) NA Right Conservative NA 94 0 0 10 91 8 50 80 Neutral Neutral Agree Agree Neutral Strongly agree Neutral Agree Agree Agree Disagree Disagree Agree Disagree Agree Agree Disagree Agree Agree Agree Disagree Neutral Yes
11693590 12/09/2024 08:21:53 38 Desktop or Laptop Chrome 128.0.0.0 Start P Yes 1 1 1 1 1 Yes 1 0 32 male NA Self-employed NA Bachelors degree (BA, BSc., BEd., BEng.) NA Left Green NA 5 20 30 85 0 4 50 39 Strongly disagree Strongly agree Strongly disagree Agree Strongly agree Strongly agree Strongly agree Strongly agree Disagree Strongly agree Strongly disagree Disagree Strongly agree Strongly disagree Strongly agree Strongly agree Disagree Strongly agree Strongly agree Strongly agree Strongly disagree Disagree Yes
11693627 12/09/2024 08:27:29 38 Desktop or Laptop Chrome 128.0.0.0 Start P Yes 1 1 1 1 1 Yes 1 1 36 female NA Employed NA Bachelors degree (BA, BSc., BEd., BEng.) NA Left Labour NA 27 89 23 35 2 8 50 20 Strongly disagree Strongly agree Disagree Neutral Neutral Strongly agree Strongly agree Agree Agree Strongly agree Strongly disagree Disagree Strongly agree Disagree Agree Agree Disagree Agree Strongly agree Strongly agree Disagree Disagree Yes
11693677 12/09/2024 08:39:20 38 Desktop or Laptop Chrome 128.0.0.0 Start M Yes 1 1 1 1 1 Yes 1 6 37 female NA Employed NA Higher secondary or further education (A-levels, T-levels, BTEC, International Baccalaureate or equivalent) NA Right Reform UK NA 0 0 1 0 75 8 50 20 Strongly agree Neutral Disagree Strongly agree Agree Strongly agree Neutral Agree Agree Agree Disagree Disagree Neutral Strongly disagree Strongly agree Strongly agree Strongly disagree Strongly agree Strongly agree Strongly agree Disagree Neutral Yes
11693689 12/09/2024 08:39:33 38 Desktop or Laptop Chrome 128.0.0.0 Start M Yes 1 1 1 1 1 Yes 1 6 35 male NA Employed NA Bachelors degree (BA, BSc., BEd., BEng.) NA Slightly right Conservative NA 60 15 42 12 10 8 2 39 Neutral Agree Neutral Neutral Disagree Disagree Agree Disagree Disagree Disagree Neutral Disagree Agree Disagree Agree Agree Disagree Agree Agree Agree Neutral Neutral Yes
data_gng <- bind_rows(data_gng_m, data_gng_p)

head(data_gng) %>% 
  tt()
tinytable_achk4ujh27i9ebyvx0c6
Participant Private ID UTC Date and Time Experiment Version Participant Device Participant Browser Task Name randomiser-evbs Event Index Trial Number Screen Component Name Object Name Response Type response rt correct letter stimulus trial_id condition
11693585 12/09/2024 08:28:20 38 Desktop or Laptop Chrome 128.0.0.0 Go / No-Go Task (M condition) Start M 7 1 Stimulus Keyboard Response go response go 450 1 M go practice_006 Practice M
11693585 12/09/2024 08:28:21 38 Desktop or Laptop Chrome 128.0.0.0 Go / No-Go Task (M condition) Start M 10 2 Stimulus Keyboard Response go response go 500 1 M go practice_008 Practice M
11693585 12/09/2024 08:28:23 38 Desktop or Laptop Chrome 128.0.0.0 Go / No-Go Task (M condition) Start M 13 3 Stimulus Time Limit screen response nogo 500 1 W nogo practice_009 Practice M
11693585 12/09/2024 08:28:24 38 Desktop or Laptop Chrome 128.0.0.0 Go / No-Go Task (M condition) Start M 16 4 Stimulus Keyboard Response go response go 392 1 M go practice_004 Practice M
11693585 12/09/2024 08:28:26 38 Desktop or Laptop Chrome 128.0.0.0 Go / No-Go Task (M condition) Start M 19 5 Stimulus Time Limit screen response nogo 500 1 W nogo practice_010 Practice M
11693585 12/09/2024 08:28:27 38 Desktop or Laptop Chrome 128.0.0.0 Go / No-Go Task (M condition) Start M 22 6 Stimulus Keyboard Response go response go 364 1 M go practice_007 Practice M

Remove participants who failed attention check

As pre-registered, I remove participants who failed a very obvious attention check right at the beginning of the study. One participant had to be removed due to ethical reasons as they incorrectly indicated their age on Prolific.

data_questionnaires %>% 
  filter(attention_check != "Strongly agree") %>% 
  select(`Participant Private ID`, age_year, ideology) %>% 
  tt()
tinytable_lt9w6on1z7dggfvo2d88
Participant Private ID age_year ideology
11693689 35 Slightly right
11700313 24 Slightly right
data_gng_f <- data_gng %>% 
  filter(!(`Participant Private ID` %in% c(11693689, 11700313)))

data_fake_news_f <- data_fake_news %>% 
  filter(!(`Participant Private ID` %in% c(11693689, 11700313)))

data_questionnaires_combined_f <- data_questionnaires_combined %>% 
  filter(!(`Participant Private ID` %in% c(11693689, 11700313)))

Check technical errors

data_questionnaires_combined %>%
  count(`Participant Private ID`) %>% 
  filter(n != 1)
# A tibble: 0 × 2
# ℹ 2 variables: Participant Private ID <dbl>, n <int>

The questionnaires look good.

data_gng %>% 
  count(`Participant Private ID`) %>% 
  filter(n != 300)
# A tibble: 2 × 2
  `Participant Private ID`     n
                     <dbl> <int>
1                 11694557   450
2                 11695853   296

For some reason two IDs have not 300 observations which should not happen and is a technical error from Gorilla. As the study was set up, it is not possible to have less or more than 300 trials, and this thus an indication that there was an error and data cannot be used.

data_fake_news %>% 
  count(`Participant Private ID`) %>% 
  filter(n > 35 | n < 25)
# A tibble: 2 × 2
  `Participant Private ID`     n
                     <dbl> <int>
1                 11694792    55
2                 11696062    55

Another two participants have a weird number of observations and need to be removed. This should not happen and again is a technical issue from Gorilla.

Remove technical errors

This was not pre-registered, as technical errors outside of the control of the researchers cannot be.

data_gng_f <- data_gng_f %>% 
  filter(!(`Participant Private ID` %in% c(11694557, 11695853, 
                                           11694792, 11696062)))

data_fake_news_f <- data_fake_news_f %>% 
  filter(!(`Participant Private ID` %in% c(11694557, 11695853, 
                                           11694792, 11696062)))

data_questionnaires_combined_f <- data_questionnaires_combined_f %>% 
  filter(!(`Participant Private ID` %in% c(11694557, 11695853, 
                                           11694792, 11696062)))

Check the age of participants

I intended to recruit only 18-38 year olds - we expect some deviation as this is based on Prolific and not all information is always up to date. Let’s check if there are some participants that incorrectly stated their age.

data_questionnaires_combined_f %>% 
  filter(age_year < 18 | age_year > 38) %>% 
  select(`Participant Private ID`, age_year)
# A tibble: 7 × 2
  `Participant Private ID` age_year
                     <dbl> <chr>   
1                 11693545 39      
2                 11694382 NaN     
3                 11694592 40      
4                 11694730 39      
5                 11694907 39      
6                 11695000 51      
7                 11695048 124     

I am not super concered about the participants who are slightly above 38. However, the others, I contacted on Prolific. Two of them could correct their age and reported the following ages:

  • 11695048 –> 38
  • 11694382 -> 38

So let’s create a age variable that takes the age in years for all participants, but for these two uses the corrected values.

data_questionnaires_combined_f <- data_questionnaires_combined_f %>%
  mutate(age_year = as.numeric(age_year)) %>% 
  mutate(age_corrected = case_when(
    `Participant Private ID` == 11695048 ~ 38,
    `Participant Private ID` == 11694382 ~ 38,
    TRUE ~ age_year  
  )) 

Demographics of filtered participants

data_questionnaires_combined %>% 
  filter(`Participant Private ID` %in% c(11693689, 11700313,
                                         11694557, 11695853, 
                                         11694792, 11696062)) %>% 
  select(`Participant Private ID`, `Participant Browser`, ideology,
         age_year) %>% 
  tt()
tinytable_i8lq7i6pf7mkfusq9jhf
Participant Private ID Participant Browser ideology age_year
11693689 Chrome 128.0.0.0 Slightly right 35
11694557 Edge 128.0.0.0 Slightly left 32
11694792 Firefox 130.0 Slightly left 33
11696062 Chrome 128.0.0.0 Right 16
11700313 Chrome 128.0.0.0 Slightly right 24
11695853 Mobile Safari 17.6 Slightly right 34

This leaves us with the final pre-registered sample size of exactly N = 504 participants.

Save dataframes

Save different .csv data frames for further analyses.

# questionnaire data
write_csv(data_questionnaires_combined_f, here("01_data", "cleaned", "data_questionnaires_cleaned.csv"), 
          na = "", append = FALSE, col_names = TRUE)

# gng data
write_csv(data_gng_f, here("01_data", "cleaned", "data_gng_cleaned.csv"), 
          na = "", append = FALSE, col_names = TRUE)

# fake news task data
write_csv(data_fake_news_f, here("01_data", "cleaned", "data_fake_news_cleaned.csv"), 
          na = "", append = FALSE, col_names = TRUE)